from bs4 import BeautifulSoup
import csv, os
os.chdir("D:\\KIPRIS\\Biblio")

# csv file and csv object
a = open("assignee_8417_2020.csv", "w", encoding="utf-8", newline="")
wa = csv.writer(a)

b = open("basic_8417_2020.csv", "w", encoding="utf-8", newline="")
wb = csv.writer(b)

i = open("ipc_8417_2020.csv", "w", encoding="utf-8", newline="")
wi = csv.writer(i)

for year in range(1984, 2018):
	directory = "D:\\KIPRIS\\Biblio\\Biblio{}.xml".format(year)
	txt = open(directory, "r", encoding= "utf-8")
	txt = txt.read()
	txt = txt.replace("\n", "")
	xml_list = txt.split('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
	for xml in xml_list[1:]:
		biblio = BeautifulSoup(xml, 'xml')

		# Biblio
		appnum = biblio.find("applicationNumber").text
		adate = biblio.find("applicationDate").text
		fdisp = biblio.find("finalDisposal").text
		nclaim = biblio.find("claimCount").text
		onum = biblio.find("openNumber").text
		odate = biblio.find("openDate").text
		rnum = biblio.find("registerNumber").text
		rdate = biblio.find("registerDate").text
		line = [appnum, adate, onum, odate, rnum, rdate, nclaim, fdisp]
		wb.writerow(line)

		# Applicant
		applicants = biblio.find_all("applicantInfo")
		for index, applicant in enumerate(applicants):
			order = index + 1
			address = applicant.find("address").text
			code = applicant.find("code").text
			ctry = applicant.find("country").text
			engname = applicant.find("engName").text
			line = [appnum, order, engname, code, ctry, address]
			wa.writerow(line)

		# IPC
		ipcs = biblio.find_all("ipcInfo")
		for index, ipc in enumerate(ipcs):
			order = index + 1
			ipcNumber = ipc.find("ipcNumber").text
			ipcDate = ipc.find("ipcDate").text
			line = [appnum, order, ipcNumber, ipcDate]
			wi.writerow(line)

		print(appnum)